import os

from llama_index.core.node_parser import SimpleNodeParser
from llmsherpa.readers import LayoutPDFReader

from config import HF_ENDPOINT, PROJECT_DIR

print(HF_ENDPOINT)

from llama_index.readers.smart_pdf_loader import SmartPDFLoader

llmsherpa_api_url = "http://172.16.14.116:5001/api/parseDocument?renderFormat=all"
pdf_url="../data/files/A3301010060400242001281.pdf"
pdf_reader=LayoutPDFReader(parser_api_url=llmsherpa_api_url)
docs= pdf_reader.read_pdf(pdf_url)
print(docs.to_text())
print(docs.to_html())

pdf_loader = SmartPDFLoader(llmsherpa_api_url=llmsherpa_api_url)
documents = pdf_loader.load_data(pdf_url)
print(documents)


node_parser = SimpleNodeParser.from_defaults(chunk_size=1024)
base_nodes = node_parser.get_nodes_from_documents(documents)
print(f'end:{base_nodes}')